library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.1.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
getwd()
## [1] "C:/Users/kchz694/OneDrive - AZCollaboration/Desktop/R_DataAnalytics_May2022/IP_R Code"
##Setup working directory
setwd("C:/Users/kchz694/OneDrive - AZCollaboration/From Desktop 8-5-20/RT R Scripts/R_scripts/TV_RMD")
getwd()
## [1] "C:/Users/kchz694/OneDrive - AZCollaboration/From Desktop 8-5-20/RT R Scripts/R_scripts/TV_RMD"
Read csv file from folder
## Rows: 21 Columns: 85
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## dbl (85): Day, gp1_1, gp1_2, gp1_3, gp1_4, gp1_5, gp1_6, gp2_1, gp2_2, gp2_3...
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## # A tibble: 6 x 85
## Day gp1_1 gp1_2 gp1_3 gp1_4 gp1_5 gp1_6 gp2_1 gp2_2 gp2_3 gp2_4 gp2_5 gp2_6
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 22 240. 106. 166. 111. 172. 147. 151. 241. 106. 111. 131. 178.
## 2 26 257. 125. 203. 140. 201. 217. 223. 243. 152. 191. 193. 230.
## 3 29 270. 231. 306. 204. 329. 281. 274. 306. 229. 206. 238. 257.
## 4 33 346. 306. 482. 268. 368. 423. 339. 474. 247. 220. 282. 283.
## 5 36 378. 364. 513. 273. 446. 465. 393. 550. 277. 306. 320. 341.
## 6 40 459. 374. 769. 316. 382. 676. 607. 645. 341. 308. 524. 505.
## # ... with 72 more variables: gp3_1 <dbl>, gp3_2 <dbl>, gp3_3 <dbl>,
## # gp3_4 <dbl>, gp3_5 <dbl>, gp3_6 <dbl>, gp4_1 <dbl>, gp4_2 <dbl>,
## # gp4_3 <dbl>, gp4_4 <dbl>, gp4_5 <dbl>, gp4_6 <dbl>, gp5_1 <dbl>,
## # gp5_2 <dbl>, gp5_3 <dbl>, gp5_4 <dbl>, gp5_5 <dbl>, gp5_6 <dbl>,
## # gp6_1 <dbl>, gp6_2 <dbl>, gp6_3 <dbl>, gp6_4 <dbl>, gp6_5 <dbl>,
## # gp6_6 <dbl>, gp7_1 <dbl>, gp7_2 <dbl>, gp7_3 <dbl>, gp7_4 <dbl>,
## # gp7_5 <dbl>, gp7_6 <dbl>, gp8_1 <dbl>, gp8_2 <dbl>, gp8_3 <dbl>, ...
Restructure tidy format from wide format to longer using pivot_longer. Columns gp1_1, gp1_2 etc. to grp (new variable, which can be used to split in the next step)and tumor values to tv
tv2 <- tv2 %>% pivot_longer(cols =2:85, names_to="grp", values_to="tv")
head(tv2)
## # A tibble: 6 x 3
## Day grp tv
## <dbl> <chr> <dbl>
## 1 22 gp1_1 240.
## 2 22 gp1_2 106.
## 3 22 gp1_3 166.
## 4 22 gp1_4 111.
## 5 22 gp1_5 172.
## 6 22 gp1_6 147.
Now using separate func split grp into grp and replicate to get replicate number separate at and remove *_*
tv2 <- tv2 %>% separate(grp, into=c("grp","replicate"),sep="_")
head(tv2,10)
## # A tibble: 10 x 4
## Day grp replicate tv
## <dbl> <chr> <chr> <dbl>
## 1 22 gp1 1 240.
## 2 22 gp1 2 106.
## 3 22 gp1 3 166.
## 4 22 gp1 4 111.
## 5 22 gp1 5 172.
## 6 22 gp1 6 147.
## 7 22 gp2 1 151.
## 8 22 gp2 2 241.
## 9 22 gp2 3 106.
## 10 22 gp2 4 111.
Clean up the NA values which got from the blank lines on the original data
tv2 %>%
filter(!is.na(tv)) -> tv2
head(tv2)
## # A tibble: 6 x 4
## Day grp replicate tv
## <dbl> <chr> <chr> <dbl>
## 1 22 gp1 1 240.
## 2 22 gp1 2 106.
## 3 22 gp1 3 166.
## 4 22 gp1 4 111.
## 5 22 gp1 5 172.
## 6 22 gp1 6 147.
To show how paste0 works paste0 so it doesn’t put a space between the two parts.
paste0("gp",1:14)
## [1] "gp1" "gp2" "gp3" "gp4" "gp5" "gp6" "gp7" "gp8" "gp9" "gp10"
## [11] "gp11" "gp12" "gp13" "gp14"
Need convert grp in to factor becoz in the legend grp numbers shows in the alphabetical order (ex: gp1, gp10,gp11,gp2,etc.To avoid this confusion convert grp into levels using factor func. Join(paste) together Gp to an ordered numerical vector.
tv2 %>%
mutate(grp = factor(grp, levels=paste0("gp",1:14))) -> tv2
head(tv2)
## # A tibble: 6 x 4
## Day grp replicate tv
## <dbl> <fct> <chr> <dbl>
## 1 22 gp1 1 240.
## 2 22 gp1 2 106.
## 3 22 gp1 3 166.
## 4 22 gp1 4 111.
## 5 22 gp1 5 172.
## 6 22 gp1 6 147.
Add summarisation to geom_line. Use stat_summary and add errorbar/point.This defaults to using SEM
tv2 %>%
ggplot(aes(x=Day,y=tv,color=grp)) +
geom_line(stat="summary", fun="mean")+
stat_summary(geom="errorbar", width=2.0)+
stat_summary(geom="point", width=0.5)+
#geom_point(width=0.5)+ =need to add in the stat_summary
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"))+
coord_cartesian(ylim=c(0,2000))+
scale_x_continuous(breaks=c(20,40,60,80,100,120,140))+
xlab("Days")+
ylab("TV mm3")
## Warning: Ignoring unknown parameters: width
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
ALL the code same place and use for plotly() geom_line(stat=“summary”, fun=“mean”)+ # need to use after stat_summary(), geom_point in order to show lines in the graph
rtplot <- tv2 %>%
ggplot(aes(x=Day,y=tv,color=grp)) +
stat_summary(geom="errorbar", width=2.0)+
stat_summary(geom="point", width=0.5)+
geom_line(stat="summary", fun="mean")+
#geom_point(width=0.5)+ =need to add in the stat_summary
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"))+
coord_cartesian(ylim=c(0,2000))+
scale_x_continuous(breaks=c(20,40,60,80,100,120,140))+
xlab("Days after Tumor Cells Implant")+
ylab("TV mm3")
## Warning: Ignoring unknown parameters: width
rtplot
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
rtplot1 <- ggplotly(rtplot)
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
rtplot1